Hi, decided to call the data set Food_Sec, have loaded the entire data set and would then subset it to the columns we are working on.

There was one Raw_Data.csv file aswell that had done this already but that didn’t have all the observations

getwd() install.packages(“epiR”) ## Installng tidyverse to use dplyr to rename the columns install.packages(‘tidyverse’)

Food_Sec <- data.frame(read.csv("dec21pub.csv"))
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ezids)
library(ggplot2)
library(epiR)
## Loading required package: survival
## Package epiR 2.0.52 is loaded
## Type help(epi.about) for summary information
## Type browseVignettes(package = 'epiR') to learn how to use epiR for applied epidemiological analyses
## 
FS_Subset <- subset(Food_Sec, HRINTSTA == 001 & HRSUPINT == 001 & HRFS12MD != -9)
FS_Subset <- subset(FS_Subset, select = c("HRHHID", "GESTFIPS", "HRNUMHOU", "HEFAMINC", "HESP1",    "PTDTRACE", "PRCITSHP", "PEMJNUM",  "PEHRUSL1", "PEEDUCA", "HRFS12MD", "PRNMCHLD"))

FS_Subset <- FS_Subset %>% rename("Id" = "HRHHID", "States" = "GESTFIPS", "Family_Size" = "HRNUMHOU",   "Household_Income" = "HEFAMINC",    "SNAP" = "HESP1",   "Ethnicity" =   "PTDTRACE", "Citizenship_status" = "PRCITSHP",  "Number_of_Jobs" = "PEMJNUM",   "Hours_on_Jobs" = "PEHRUSL1" , "Education_Level" = "PEEDUCA" , "FoodSecurity_score" = "HRFS12MD")

## Converting the all the columns to factors as they are all ordinal(except the Id, but since it's categorical i'm converting it into a factor too)


FS_Subset[] <- lapply( FS_Subset, factor)

str(FS_Subset)
## 'data.frame':    71472 obs. of  12 variables:
##  $ Id                : Factor w/ 27922 levels "5185410966","8178510165",..: 16600 9378 9378 8472 8472 7861 7861 19375 19375 24604 ...
##  $ States            : Factor w/ 51 levels "1","2","4","5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family_Size       : Factor w/ 14 levels "1","2","3","4",..: 1 2 2 2 2 2 2 2 2 1 ...
##  $ Household_Income  : Factor w/ 16 levels "1","2","3","4",..: 16 14 14 12 12 13 13 9 9 11 ...
##  $ SNAP              : Factor w/ 5 levels "-3","-2","-1",..: 3 3 3 5 5 3 3 5 5 3 ...
##  $ Ethnicity         : Factor w/ 24 levels "1","2","3","4",..: 1 1 1 1 1 1 1 2 2 1 ...
##  $ Citizenship_status: Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Number_of_Jobs    : Factor w/ 4 levels "-1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Hours_on_Jobs     : Factor w/ 88 levels "-4","-1","0",..: 67 43 2 43 43 62 43 2 2 2 ...
##  $ Education_Level   : Factor w/ 17 levels "-1","31","32",..: 14 15 1 14 14 10 10 7 10 5 ...
##  $ FoodSecurity_score: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 2 2 1 ...
##  $ PRNMCHLD          : Factor w/ 12 levels "0","1","2","3",..: 1 2 1 1 1 1 1 1 1 1 ...

##Exploring the Food Security Score

levels(FS_Subset$'FoodSecurity_score') <- c( "High Food Security", "Marginal Food Security", "Low Food Security", "Very Low Food Security")
summary(FS_Subset$'FoodSecurity_score')
##     High Food Security Marginal Food Security      Low Food Security 
##                  58901                   5442                   4820 
## Very Low Food Security 
##                   2309
FS_Subset$FS_Status <- FS_Subset$FoodSecurity_score

levels(FS_Subset$FS_Status) <- c( "Food Secure", "Food Secure", "Food Insecure", "Food Insecure")

##1 :High Food Security, 2 : Marginal Food Security , 3 : Low Food Security, 4 : Very Low Food Security ,-9 : No Response ## Children’s Food Security Scale variables are coded as “Not in Universe” (-1) if there were no children in the household. ## Attachment 17, on page 288, contains details about the food security score. I have only given it a cursory glace, but we will have to read it in detail.

ggplot(FS_Subset, aes(x= FoodSecurity_score,  y = ..prop.., group = 1)) + geom_bar( fill = "blue") +  geom_text(aes(label = round((..prop..), 2)), stat = "count", hjust = 0, colour = "black") + coord_flip()  + labs(x= " ", y= "Relative Frequency ") +  scale_y_continuous(limits = c(0, 1))

df <- FS_Subset %>% 
  group_by(FS_Status) %>% # Variable to be transformed
  count() %>% 
  ungroup() %>% 
  mutate(perc = `n` / sum(`n`)) %>% 
  arrange(perc) %>%
  mutate(labels = scales::percent(perc))

ggplot(df, aes(x = "", y = perc, fill = FS_Status)) +
  geom_col() +
    geom_text(aes(label = labels),
            position = position_stack(vjust = 0.5)) +
  coord_polar(theta = "y")

Renaming levels of Ethnicity and Citizenship_Status

levels(FS_Subset$'Ethnicity') <- c('White only', 'Black only', 'American Indian, Alaskan native only', 'Asian Only', 'Hawaiian', 'White-black', 'White-AI', 'White-Asian', 'White-HP', 'Black-AI', 'Black-Asian', 'Black-HP', 'AI-Asian', 'AI-HP', 'Asian-HP', 'W-B-AI', 'W-B-A', 'W-B-HP', 'W-AI-A', 'W-AI-HP', 'W-A-HP', 'B-AI-A', 'W-B-AL-A', 'W-AI-A-HP', 'Other 3 race combo', 'Other 4 and 5 race combo')
summary(FS_Subset$'Ethnicity', title = "PTDTRACE")
##                           White only                           Black only 
##                                57253                                 7173 
## American Indian, Alaskan native only                           Asian Only 
##                                  952                                 3989 
##                             Hawaiian                          White-black 
##                                  329                                  531 
##                             White-AI                          White-Asian 
##                                  437                                  380 
##                             White-HP                             Black-AI 
##                                   68                                   72 
##                          Black-Asian                             Black-HP 
##                                   33                                    4 
##                             AI-Asian                                AI-HP 
##                                    8                                    1 
##                             Asian-HP                               W-B-AI 
##                                   58                                   51 
##                                W-B-A                               W-B-HP 
##                                   16                                    7 
##                               W-AI-A                              W-AI-HP 
##                                   10                                    6 
##                               W-A-HP                               B-AI-A 
##                                   72                                    3 
##                             W-B-AL-A                            W-AI-A-HP 
##                                    1                                   18 
##                   Other 3 race combo             Other 4 and 5 race combo 
##                                    0                                    0
summary(FS_Subset$'SNAP', title = "HESP1")
##    -3    -2    -1     1     2 
##   198   156 45644  7208 18266
levels(FS_Subset$'Citizenship_status') <- c('NATIVE, BORN IN THE UNITED STATES', 'NATIVE, BORN IN PUERTO RICO OR OTHER U.S. ISLAND AREAS', 'NATIVE, BORN ABROAD OF AMERICAN PARENT OR PARENTS', 'FOREIGN BORN, U.S. CITIZEN BY NATURALIZATION', 'FOREIGN BORN, NOT A CITIZEN OF THE UNITED STATES')
summary(FS_Subset$'Citizenship_status', title = "PRCITSHP")
##                      NATIVE, BORN IN THE UNITED STATES 
##                                                  62819 
## NATIVE, BORN IN PUERTO RICO OR OTHER U.S. ISLAND AREAS 
##                                                    265 
##      NATIVE, BORN ABROAD OF AMERICAN PARENT OR PARENTS 
##                                                    523 
##           FOREIGN BORN, U.S. CITIZEN BY NATURALIZATION 
##                                                   4051 
##       FOREIGN BORN, NOT A CITIZEN OF THE UNITED STATES 
##                                                   3814

Plotting barcharts between all the levels of each factor and food security status

for(i in levels(FS_Subset$Ethnicity)){
   
  print(ggplot(subset(FS_Subset, Ethnicity == i ) , aes(x= Ethnicity, color =FoodSecurity_score, fill = FoodSecurity_score)) + geom_bar( aes(y = (..count..)/sum(..count..)*100), position = "dodge") + labs(x= " ", y= "Percentage "))

}

selected <- c('White only', 'Black only', 'American Indian, Alaskan native only', 'Asian Only', 'Hawaiian')

FS_ss <- FS_Subset[FS_Subset$Ethnicity %in% selected,]

FS_ss <- droplevels(FS_ss)


chi_test <- table(FS_ss$Ethnicity, FS_ss$FS_Status)
chisq.test(chi_test)
## 
##  Pearson's Chi-squared test
## 
## data:  chi_test
## X-squared = 996.87, df = 4, p-value < 2.2e-16
for(i in levels(FS_Subset$Citizenship_status)){
   
  print(ggplot(subset(FS_Subset, Citizenship_status == i ) , aes(x= Citizenship_status, color =FoodSecurity_score, fill = FoodSecurity_score)) + geom_bar( aes(y = (..count..)/sum(..count..)*100), position = "dodge")+ labs(x= " ", y= "Percentage "))
}

chi_test_CS <- table(FS_ss$Citizenship_status, FS_ss$FoodSecurity_score)
chisq.test(chi_test_CS)
## 
##  Pearson's Chi-squared test
## 
## data:  chi_test_CS
## X-squared = 441.47, df = 12, p-value < 2.2e-16
table(FS_Subset$SNAP)
## 
##    -3    -2    -1     1     2 
##   198   156 45644  7208 18266
## Only would be considering the levels 1 and 2. here -1, the one with most frequency, means that the response is not in universe. Ideally, I should substitute this but due to time constraints I won't be doing that. -3 and -2 mean refuse to respond and no response.


FS_SNAP <- subset(FS_Subset, FS_Subset$SNAP != -1)
FS_SNAP <- droplevels(FS_SNAP)

levels(FS_SNAP$SNAP) <- c('Refused', 'Dont Know', 'Uses SNAP', 'No SNAP')

ggplot(FS_SNAP, aes(x = SNAP, fill = FoodSecurity_score)) + geom_bar(position = "fill")

ggplot(FS_SNAP, aes(x = SNAP, fill = FS_Status)) + geom_bar(position = "fill")

chi_test_SNAP <- table(FS_SNAP$SNAP, FS_SNAP$FS_Status)
chi_test_SNAP
##            
##             Food Secure Food Insecure
##   Refused           193             5
##   Dont Know         139            17
##   Uses SNAP        4471          2737
##   No SNAP         14258          4008
chisq.test(chi_test_SNAP)
## 
##  Pearson's Chi-squared test
## 
## data:  chi_test_SNAP
## X-squared = 764.1, df = 3, p-value < 2.2e-16
table(FS_SNAP$SNAP)
## 
##   Refused Dont Know Uses SNAP   No SNAP 
##       198       156      7208     18266
SNAP_Odds <- droplevels(subset(FS_SNAP, SNAP != "Refused" & SNAP != "Dont Know"))

TAB <- table(SNAP_Odds$SNAP, SNAP_Odds$FS_Status)

barplot(TAB, beside= T, legend = T)

epi.2by2(TAB, method = "cohort.count")
##              Outcome +    Outcome -      Total        Inc risk *        Odds
## Exposed +         4471         2737       7208              62.0        1.63
## Exposed -        14258         4008      18266              78.1        3.56
## Total            18729         6745      25474              73.5        2.78
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Inc risk ratio                                 0.79 (0.78, 0.81)
## Odds ratio                                     0.46 (0.43, 0.49)
## Attrib risk in the exposed *                   -16.03 (-17.30, -14.76)
## Attrib fraction in the exposed (%)            -25.84 (-28.34, -23.40)
## Attrib risk in the population *                -4.54 (-5.34, -3.73)
## Attrib fraction in the population (%)         -6.17 (-6.68, -5.66)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 682.162 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval
##  * Outcomes per 100 population units
food_ave <- subset(FS_Subset, select = c("Id",  "Number_of_Jobs",   "Hours_on_Jobs",    "Education_Level", "FoodSecurity_score"))
head(food_ave)
##                 Id Number_of_Jobs Hours_on_Jobs Education_Level
## 1  404006407110031             -1            65              43
## 7  147240092351000             -1            40              44
## 8  147240092351000             -1            -1              -1
## 16 128450301231000             -1            40              43
## 17 128450301231000             -1            40              43
## 18 114580195861000             -1            60              39
##    FoodSecurity_score
## 1  High Food Security
## 7  High Food Security
## 8  High Food Security
## 16 High Food Security
## 17 High Food Security
## 18 High Food Security
str(food_ave)
## 'data.frame':    71472 obs. of  5 variables:
##  $ Id                : Factor w/ 27922 levels "5185410966","8178510165",..: 16600 9378 9378 8472 8472 7861 7861 19375 19375 24604 ...
##  $ Number_of_Jobs    : Factor w/ 4 levels "-1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Hours_on_Jobs     : Factor w/ 88 levels "-4","-1","0",..: 67 43 2 43 43 62 43 2 2 2 ...
##  $ Education_Level   : Factor w/ 17 levels "-1","31","32",..: 14 15 1 14 14 10 10 7 10 5 ...
##  $ FoodSecurity_score: Factor w/ 4 levels "High Food Security",..: 1 1 1 1 1 1 1 2 2 1 ...
food_ave$Number_of_Jobs = as.factor(food_ave$Number_of_Jobs)
food_ave$Education_Level = as.factor(food_ave$Education_Level)
food_ave$FoodSecurity_score = as.factor(food_ave$FoodSecurity_score)
food_ave$Hours_on_Jobs = as.numeric(food_ave$Hours_on_Jobs)
str(food_ave)
## 'data.frame':    71472 obs. of  5 variables:
##  $ Id                : Factor w/ 27922 levels "5185410966","8178510165",..: 16600 9378 9378 8472 8472 7861 7861 19375 19375 24604 ...
##  $ Number_of_Jobs    : Factor w/ 4 levels "-1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Hours_on_Jobs     : num  67 43 2 43 43 62 43 2 2 2 ...
##  $ Education_Level   : Factor w/ 17 levels "-1","31","32",..: 14 15 1 14 14 10 10 7 10 5 ...
##  $ FoodSecurity_score: Factor w/ 4 levels "High Food Security",..: 1 1 1 1 1 1 1 2 2 1 ...

Number of Jobs

plot(food_ave$Number_of_Jobs)

ggplot(food_ave) +
  geom_bar(aes(x = Number_of_Jobs, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

ggplot(subset(food_ave, Number_of_Jobs == -1)) +
  geom_bar(aes(x = Number_of_Jobs, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

ggplot(subset(food_ave, Number_of_Jobs == 2)) +
  geom_bar(aes(x = Number_of_Jobs, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

ggplot(subset(food_ave, Number_of_Jobs == 3)) +
  geom_bar(aes(x = Number_of_Jobs, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

ggplot(subset(food_ave, Number_of_Jobs == 4)) +
  geom_bar(aes(x = Number_of_Jobs, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

Here * 1 High Food Security * 2 Marginal Food Security * 3 Low Food Security * 4 Very Low Food Security * -9 No Response

In the above graphs, people say that they have High Food Security irrespective of the number of jobs. But lets use Chi-Square test to see if they are really independent of each other

contable <- table(food_ave$Number_of_Jobs, food_ave$FoodSecurity_score)
xkabledply(contable)
Table
High Food Security Marginal Food Security Low Food Security Very Low Food Security
-1 57321 5318 4701 2244
2 1425 115 112 54
3 136 7 7 9
4 19 2 0 2
## Chi-Square Test 
chi_test <- chisq.test(contable, correct = F)
## Warning in chisq.test(contable, correct = F): Chi-squared approximation may be
## incorrect
chi_test
## 
##  Pearson's Chi-squared test
## 
## data:  contable
## X-squared = 12.429, df = 9, p-value = 0.1902

The result gave warnings as the estimated value for some cells are very low. From the test, we see that the P-value for the Chi-square test is 0.3871 which is greater than the default value 0.05. Hence we accept the null hypothesis and hence, Number of Jobs doesn’t significantly affect the Food Security.

###Education Level

edu_freq <- as.data.frame(table(food_ave$Education_Level))
edu_freq
##    Var1  Freq
## 1    -1 12558
## 2    31   144
## 3    32   251
## 4    33   446
## 5    34   912
## 6    35  1241
## 7    36  1538
## 8    37  1625
## 9    38   910
## 10   39 16004
## 11   40  9492
## 12   41  2454
## 13   42  3257
## 14   43 12871
## 15   44  5720
## 16   45   858
## 17   46  1191
plot(food_ave$Education_Level)

ggplot(food_ave) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

The response is understood as follows:

31 LESS THAN 1ST GRADE 32 1ST, 2ND, 3RD OR 4TH GRADE 33 5TH OR 6TH GRADE 34 7TH OR 8TH GRADE 35 9TH GRADE 36 10TH GRADE 37 11TH GRADE 38 12TH GRADE NO DIPLOMA 39 HIGH SCHOOL GRAD-DIPLOMA OR EQUIV (GED) 40 SOME COLLEGE BUT NO DEGREE 41 ASSOCIATE DEGREE-OCCUPATIONAL/VOCATIONAL 42 ASSOCIATE DEGREE-ACADEMIC PROGRAM 43 BACHELOR’S DEGREE (EX: BA, AB, BS) 44 MASTER’S DEGREE (EX: MA, MS, MEng, MEd, MSW) 45 PROFESSIONAL SCHOOL DEG (EX: MD, DDS, DVM) 46 DOCTORATE DEGREE (EX: PhD, EdD)

ggplot(subset(food_ave, Education_Level == -1)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 31)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 32)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 33)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 34)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 35)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 36)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 37)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 38)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 39)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 40)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 41)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 42)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 43)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 44)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 45)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

ggplot(subset(food_ave, Education_Level == 46)) +
  geom_bar(aes(x = Education_Level, fill = FoodSecurity_score), position = "dodge") +
  xlab("Education") + ylab("Count")

contable_edu <- table(food_ave$Education_Level, food_ave$FoodSecurity_score)
xkabledply(contable_edu)
Table
High Food Security Marginal Food Security Low Food Security Very Low Food Security
-1 9703 1238 1199 418
31 95 20 17 12
32 158 36 41 16
33 276 55 82 33
34 621 111 129 51
35 894 141 134 72
36 1094 190 167 87
37 1168 182 178 97
38 639 106 120 45
39 12435 1575 1328 666
40 7751 744 626 371
41 2033 182 160 79
42 2773 215 162 107
43 11860 477 351 183
44 5416 140 108 56
45 828 17 7 6
46 1157 13 11 10
## Chi-Square Test 
chi_test <- chisq.test(contable_edu, correct = F)
## Warning in chisq.test(contable_edu, correct = F): Chi-squared approximation may
## be incorrect
chi_test
## 
##  Pearson's Chi-squared test
## 
## data:  contable_edu
## X-squared = 3136.8, df = 48, p-value < 2.2e-16

Education_Level is having a significant effect on the Food Security of People

##Hours_on_Jobs

plot(food_ave$Hours_on_Jobs)

ggplot(food_ave) +
  geom_bar(aes(x = Hours_on_Jobs, fill = FoodSecurity_score), position = "dodge") +
  xlab("Jobs") + ylab("Count")

ggplot(food_ave, aes(Hours_on_Jobs, fill = FoodSecurity_score)) + 
        geom_histogram(binwidth=5)

ggplot(food_ave, aes(Hours_on_Jobs)) + 
        geom_freqpoly(binwidth=1)

ggplot(food_ave, aes(Hours_on_Jobs, color= FoodSecurity_score)) + 
        geom_freqpoly(binwidth=25)

ggplot(food_ave, aes(Hours_on_Jobs, fill = FoodSecurity_score)) + 
        geom_histogram(binwidth=5) + 
        facet_wrap(~FoodSecurity_score)

vary_hours <- nrow(food_ave[food_ave$Hours_on_Jobs == -4,])
vary_hours
## [1] 0

There are 0 whose working hours vary.

summary(food_ave$Hours_on_Jobs)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    2.00    2.00   19.58   43.00   88.00
getmode <- function(v) {
   uniqv <- unique(v)
   uniqv[which.max(tabulate(match(v, uniqv)))]
}
getmode(food_ave$Hours_on_Jobs)
## [1] 2
#ggplot(food_ave, aes(x=FoodSecurity_score, y=Hours_on_Jobs)) + 
 # geom_boxplot( colour=c("red","blue","violet","navy blue","black"), outlier.shape=8, outlier.size=4) +
  #labs(title="Boxplot for Hours on Jobs", x="Control / Treatment", y = "Hours on Job")
h_anova <- aov(Hours_on_Jobs ~ FoodSecurity_score, data = food_ave)
h_tuskey <- TukeyHSD(h_anova)
summary(h_anova)
##                       Df   Sum Sq Mean Sq F value Pr(>F)    
## FoodSecurity_score     3   279623   93208   214.6 <2e-16 ***
## Residuals          71468 31045414     434                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
h_tuskey
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Hours_on_Jobs ~ FoodSecurity_score, data = food_ave)
## 
## $FoodSecurity_score
##                                                     diff       lwr        upr
## Marginal Food Security-High Food Security     -4.2140290 -4.972646 -3.4554123
## Low Food Security-High Food Security          -5.6863555 -6.488530 -4.8841810
## Very Low Food Security-High Food Security     -6.0702228 -7.206149 -4.9342962
## Low Food Security-Marginal Food Security      -1.4723264 -2.531399 -0.4132541
## Very Low Food Security-Marginal Food Security -1.8561937 -3.186036 -0.5263518
## Very Low Food Security-Low Food Security      -0.3838673 -1.739029  0.9712947
##                                                   p adj
## Marginal Food Security-High Food Security     0.0000000
## Low Food Security-High Food Security          0.0000000
## Very Low Food Security-High Food Security     0.0000000
## Low Food Security-Marginal Food Security      0.0020125
## Very Low Food Security-Marginal Food Security 0.0019070
## Very Low Food Security-Low Food Security      0.8860333

2 - 1, 3 -1, 4 -1, 3 -2, 4 -2 have significant difference in there mean. getmode(food_ave$Hours_on_Jobs)

#ggplot(food_ave, aes(x=FoodSecurity_score, y=Hours_on_Jobs)) + 
 # geom_boxplot( colour=c("red","blue","violet","navy blue","black"), outlier.shape=8, outlier.size=4) +
#  labs(title="Boxplot for Hours on Jobs", x="Control / Treatment", y = "Hours on Job")

There are 0 whose working hours vary. summary(food_ave$Hours_on_Jobs)

8657d5728a00f036f19d3ba04f8e0d67a4b3431f

-States, Family size, and Household Income

levels(FS_Subset$'States') <- c('AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY' )
summary(FS_Subset$'States', title = "GESTFIPS")
##   AL   AK   AZ   AR   CA   CO   CT   DE   DC   FL   GA   HI   ID   IL   IN   IA 
## 1207  970 1080 1258 6975  764  593  832 1208 2738 1421 1125 1304 2052 1265  884 
##   KS   KY   LA   ME   MD   MA   MI   MN   MS   MO   MT   NE   NV   NH   NJ   NM 
##  952  808 1606  564  963 1352 1762  999 1505 1099 1255  791 1007  994 1397 1242 
##   NY   NC   ND   OH   OK   OR   PA   RI   SC   SD   TN   TX   UT   VT   VA   WA 
## 2580 1503 1143 1681  985 1247 1928  647 1028  840 1452 3946 1306 1030 1301 1404 
##   WV   WI   WY 
## 1214 1092 1173

California has the highest number of respondents (6975), whereas Maine has the smallest number of respondents (564). In order to compare, I’m choosing states which has similar number of respondents. Alabama 1207 and Washington DC 1207, Florida 2738 and New York 2580, IL 2052 and PA 1928.

Plotting barcharts between all the levels of state and food security status

for(i in levels(FS_Subset$States)){
   
  print(ggplot(subset(FS_Subset, States == i ) , aes(x= States, color =FoodSecurity_score, fill = FoodSecurity_score)) + geom_bar( aes(y = (..count..)/sum(..count..)*100), position = "dodge"))
}

#library(ggplot2)
#qqplot(food_una$States, food_una$FoodSecurity_score, plot.it = TRUE,  xlab="States", ylab="Food security Score",  main="QQ Plot for Food security vs States")
#hist(x=States )

<<<<<<< HEAD

#food_una <- subset(FS_Subset, select = c("HRHHID", "GESTFIPS", "HRNUMHOU", "HEFAMINC", "HRFS12MD"))
food_una <- subset(FS_Subset, select = c("Id", "States","Family_Size","Household_Income","FoodSecurity_score", "PRNMCHLD"))
head(food_una)
##                 Id States Family_Size Household_Income FoodSecurity_score
## 1  404006407110031     AL           1               16 High Food Security
## 7  147240092351000     AL           2               14 High Food Security
## 8  147240092351000     AL           2               14 High Food Security
## 16 128450301231000     AL           2               12 High Food Security
## 17 128450301231000     AL           2               12 High Food Security
## 18 114580195861000     AL           2               13 High Food Security
##    PRNMCHLD
## 1         0
## 7         1
## 8         0
## 16        0
## 17        0
## 18        0

Reference to household income: 1 LESS THAN $5,000
2 5,000 TO 7,499
3 7,500 TO 9,999
4 10,000 TO 12,499
5 12,500 TO 14,999
6 15,000 TO 19,999
7 20,000 TO 24,999
8 25,000 TO 29,999
9 30,000 TO 34,999
10 35,000 TO 39,999
11 40,000 TO 49,999
12 50,000 TO 59,999
13 60,000 TO 74,999
14 75,000 TO 99,999
15 100,000 TO 149,999
16 150,000 OR MORE

income_t <- table(FS_Subset$Household_Income, FS_Subset$FoodSecurity_score)
chisq.test(income_t)
## 
##  Pearson's Chi-squared test
## 
## data:  income_t
## X-squared = 9512.9, df = 45, p-value < 2.2e-16

We can say that Household income is affecting food insecurity.

family_t <- table(FS_Subset$Family_Size, FS_Subset$FoodSecurity_score)
chisq.test(family_t)
## Warning in chisq.test(family_t): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  family_t
## X-squared = 1691.7, df = 39, p-value < 2.2e-16

We can say that Family size is affecting food insecurity.

boxplot(FoodSecurity_score ~ Family_Size, data = food_una, col=c("blue", "red", "green", "yellow", "purple", "orange"), main="Family size - Food security")

boxplot(FoodSecurity_score ~ Household_Income, data = food_una, col=c("blue", "red", "green", "yellow", "purple", "orange"), main="Household Income - Food security")

boxplot(FoodSecurity_score ~ States, data = food_una, col=c("blue", "red", "green", "yellow", "purple", "orange"))

As you can see from the boxplot, whenever family size bigger (more than 6 people), food insecurity is high. Also, household income has direct effect on food security. When household income is higher than 40k, food security score is low.

library(ggplot2)
qqplot(food_una$Household_Income, food_una$FoodSecurity_score, plot.it = TRUE,  xlab="Household income", ylab="Food security Score",  col=food_una$Household_Income, main="QQ Plot for Food security vs Household income")

summary(FS_Subset$Family_Size)
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
##  9210 21816 12753 13724  7810  3660  1344   560   288   120    88    72    13 
##    14 
##    14
library(ggplot2)
qqplot(food_una$Family_Size, food_una$FoodSecurity_score, plot.it = TRUE,  xlab="Family size", ylab="Food security Score",  col=food_una$Family_Size, main="QQ Plot for Food security vs Family")

ggplot(data=food_una, mapping = aes(x = Family_Size, y = FoodSecurity_score)) + 
  geom_point(size=5)+aes(color = Family_Size) +
  theme_bw()

ggplot(data=food_una, mapping = aes(x = Family_Size, y = FoodSecurity_score)) + 
  geom_point(size=5)+aes(color = Household_Income) +
  theme_bw()